import numpy as np    # 导入 numpy库，用于科学计算
import pandas as pd    # 导入 pandas库 ，用于数据分析
import matplotlib.pyplot as plt   # 导入 pandas库 ，用于数据可视化

%matplotlib inline 
plt.style.use("ggplot")  # 使用ggplot绘图风格
from sklearn.linear_model import LinearRegression  # 导入线性回归工具函数 LinearRegression
x = np.array([143, 145, 146, 147, 149, 150, 153, 154, 155,
              156, 157, 158, 159, 160, 162, 164])  # 输入x数据
x = x.reshape(16, 1)    # 修改数据的格式，从行向量转换为列向量
y = np.array([88, 85, 88, 91, 92, 93, 93, 95, 96,
              98, 97, 96, 98, 99, 100, 102])  # 输入y数据
plt.scatter(x, y)    # 绘制散点图
plt.xlabel(r'$H$')  # 添加xlabel
plt.ylabel(r'$L$')  # 绘制ylabel

Text(0, 0.5, '$L$')


# 1. 初始化线性回归函数，命名为lrModel
lrModel = LinearRegression()
# 2. 使用lrModel对数据x,y进行拟合
lrModel.fit(x,y)

LinearRegression()


# 3. 输出截距和斜率
print(lrModel.intercept_)
print(lrModel.coef_)

-16.07298072980727
[0.71935219]


# 4. 根据回归得到的系数，在散点图上绘制回归直线
LR_data = [lrModel.intercept_ + lrModel.coef_ * i for i in range(145,165)]  # 求解回归直线上的一系列点
plt.plot(range(145, 165), LR_data)  # 绘制回归直线
plt.scatter(x, y)   # 绘制散点图

<matplotlib.collections.PathCollection at 0x28149e849a0>


# 5. 计算R2
lrModel.score(x,y)

0.928187845952738


# 6. 对新的数据进行预测
lrModel.predict([[170],[163]])

array([106.21689217, 101.18142681])


import pandas as pd # 导入 pandas库 ，用于数据分析
data = pd.read_csv('data/prediction_model/so2.csv') # 读入数据
print(data) # 显示数据

    SO2(ppm)    R    G    B    S    H
0          0  153  148  157  138   14
1          0  153  147  157  138   16
2          0  153  146  158  137   20
3          0  153  146  158  137   20
4          0  154  145  157  141   19
5         20  144  115  170  135   82
6         20  144  115  169  136   81
7         20  145  115  172  135   83
8         30  145  114  174  135   87
9         30  145  114  176  135   89
10        30  145  114  175  135   89
11        30  146  114  175  135   88
12        50  142   99  175  137  110
13        50  141   99  174  137  109
14        50  142   99  176  136  110
15        80  141   96  181  135  119
16        80  141   96  182  135  119
17        80  140   96  182  135  120
18       100  139   96  175  136  115
19       100  139   96  174  136  114
20       100  139   96  176  136  116
21       150  139   86  178  136  131
22       150  139   87  177  137  129
23       150  138   86  177  137  130
24       150  139   86  178  137  131


So2 = [0, 0, 0, 0, 0, 20, 20, 20, 30, 30, 30, 30, 50, 50, 50, 80, 80, 80, 100, 100, 100, 150, 150, 150, 150]
R = [153, 153, 153, 153, 154, 144, 144, 145, 145, 145, 145, 146, 142, 141, 142, 141, 141, 140, 139, 139, 139, 139, 139, 138, 139]
G = [148, 147, 146, 146, 145, 115, 115, 115, 114, 114, 114, 114, 99, 99, 99, 96, 96, 96, 96, 96, 96, 86, 87, 86, 86]
B = [157, 157, 158, 158, 157, 170, 169, 172, 174, 176, 175, 175, 175, 174, 176, 181, 182, 182, 175, 174, 176, 178, 177, 177, 178]
S = [138, 138, 137, 137, 141, 135, 136, 135, 135, 135, 135, 135, 137, 137, 136, 135, 135, 135, 136, 136, 136, 136, 137, 137, 137]
H = [14, 16, 20, 20, 19, 82, 81, 83, 87, 89, 89, 88, 110, 109, 110, 119, 119, 120, 115, 114, 116, 131, 129, 130, 131]


import seaborn as sns  # 导入 seaborn库 ，用于数据可视化
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
colors = ['r', 'g', 'b', 'm', 'black']  # 指定每个绘图的颜色
xlist = ['R', 'G', 'B', 'S', 'H']  # 指定每个绘图的变量名
plt.figure(figsize=(20, 13))  # 指定图片大小

# 绘制R子图
plt.subplot(2, 3, 1)  
sns.regplot(x=R, y=So2, color='r')  # 绘制带有回归线和置信区间的线性回归图
title_text = 'R = {:.2f}'.format(np.corrcoef(R,So2)[1][0],)
plt.title(title_text)  # 添加子图标题

# 绘制R子图
plt.subplot(2, 3, 2)  
sns.regplot(x=G, y=So2, color='g')  # 绘制带有回归线和置信区间的线性回归图
title_text = 'G = {:.2f}'.format(np.corrcoef(G,So2)[1][0],)
plt.title(title_text)  # 添加子图标题


# 绘制B子图
plt.subplot(2, 3, 3)  
sns.regplot(x=B, y=So2, color='b')  # 绘制带有回归线和置信区间的线性回归图
title_text = 'B = {:.2f}'.format(np.corrcoef(B,So2)[1][0],)
plt.title(title_text)  # 添加子图标题


# 绘制S子图
plt.subplot(2, 3, 4)  
sns.regplot(x=S, y=So2, color='m')  # 绘制带有回归线和置信区间的线性回归图
title_text = 'S = {:.2f}'.format(np.corrcoef(S,So2)[1][0],)
plt.title(title_text)  # 添加子图标题



# 绘制B子图
plt.subplot(2, 3, 5)  
sns.regplot(x=H, y=So2, color='black')  # 绘制带有回归线和置信区间的线性回归图
title_text = 'H = {:.2f}'.format(np.corrcoef(H,So2)[1][0],)
plt.title(title_text)  # 添加子图标题

Text(0.5, 1.0, 'H = 0.83')


from sklearn.linear_model import LinearRegression  # 导入线性回归函数LinearRegression
lrModel = LinearRegression()     # 初始化回归模型
lrModel.fit(np.array([R,G,B,H]).T,So2)  # 输入需要回归的数据
print('截距为：', lrModel.intercept_)  # 输出截距
print('系数为：', lrModel.coef_)    # 输出系数
score = lrModel.score(np.array([R,G,B,H]).T, So2)
print('R2为：', score)    # 输出相关系数R2

截距为： 2044.0215932049725
系数为： [ -1.38977229 -17.5022507    5.68341432  -9.34216398]
R2为： 0.896131817285016


# 加载示例数据
x = [4, 8, 12, 25, 32, 43, 58, 63, 69, 79] # 输入x
y = [20, 33, 50, 56, 42, 31, 33, 46, 65, 75]  # 输入y 
plt.scatter(x, y)  # 绘制散点图

<matplotlib.collections.PathCollection at 0x2814ad66df0>


# 加载示例数据
x = [4, 8, 12, 25, 32, 43, 58, 63, 69, 79]
y = [20, 33, 50, 56, 42, 31, 33, 46, 65, 75]
from scipy.optimize import curve_fit  # 导入非线性拟合函数curve_fit

# 定义需要拟合的函数形式，这里使用二次函数的一般式 y = ax^2 + bx + c
def f2(x, a, b, c):
    return a * x**2 + b*x + c


plt.scatter(x, y)  # 绘制散点图
popt, pcov = curve_fit(f2, x, y)    # 执行非线性拟合
# popt数组中，三个值分别是待求参数a,b,c
y1 = [f2(i, popt[0], popt[1], popt[2]) for i in x]   # 计算得到拟合曲线上的一系列点
plt.plot(x, y1, 'r')   # 绘制拟合曲线

[<matplotlib.lines.Line2D at 0x2814a7e8460>]


# f3为三次多项式
def f3(x, a, b, c,d):  
    return a * x**3 + b*x**2 +c *x +d
# f4为四次多项式
def f4(x, a, b, c,d,e):  
    return a * x**4 + b*x**3 +c *x**2 +d*x + e
# f5为五次多项式
def f5(x, a, b, c,d,e,f):  
    return a * x**5 + b*x**4 +c *x**3 +d*x**2 + e*x +f

# 定义方差计算函数
def error(y1,y2):
    a = np.array(y1)
    b = np.array(y2)
    return np.dot(a-b,a-b)
    

plt.figure(figsize = (10,10)) # 定义图片大小
plt.subplot(2,2,1) # 开始绘制第1张子图
plt.scatter(x, y)  # 绘制（x,y）的散点图
popt, pcov = curve_fit(f2, x, y)    # 执行2次多项式拟合
#popt数组中，三个值分别是待求参数a,b,c  
y1 = [f2(i, popt[0],popt[1],popt[2]) for i in x]  # 得到拟合曲线上的一系列点
plt.plot(x,y1,'r--')   # 绘制拟合曲线
plt.title(str(error(y,y1))) # 计算方差，并作为图片的标题

plt.subplot(2,2,2) # 开始绘制第2张子图
plt.scatter(x, y) # 绘制（x,y）的散点图
popt, pcov = curve_fit(f3, x, y)   # 执行3次多项式拟合
#popt数组中，三个值分别是待求参数a,b,c,d  
y1 = [f3(i, popt[0],popt[1],popt[2],popt[3]) for i in x]    # 得到拟合曲线上的一系列点
plt.plot(x,y1,'r--')   # 绘制拟合曲线
plt.title(str(error(y,y1))) # 计算方差，并作为图片的标题


plt.subplot(2,2,3) # 开始绘制第3张子图
plt.scatter(x, y)
popt, pcov = curve_fit(f4, x, y)  
y1 = [f4(i, popt[0],popt[1],popt[2],popt[3],popt[4]) for i in x]  
plt.plot(x,y1,'r--')  
plt.title(str(error(y,y1)))

plt.subplot(2,2,4) # 开始绘制第4张子图
plt.scatter(x, y)
popt, pcov = curve_fit(f5, x, y)  
y1 = [f5(i, popt[0],popt[1],popt[2],popt[3],popt[4],popt[5]) for i in x]  
plt.plot(x,y1,'r--')  
plt.title(str(error(y,y1)))

Text(0.5, 1.0, '38.052046270695655')


# 定义f1
def f1(x, a, b):  
    return x**a + b  
# 定义f2
def f2(x,a,b):
    return a*x**2 + b
# 随机生成数据并绘制折线图
xdata = np.linspace(0, 4, 50)  
y = f1(xdata, 2.5, 1.3)  
ydata = y + 4 * np.random.normal(size=len(xdata))  
plt.plot(xdata,ydata,'b-') 

# 开始拟合
popt1, pcov1 = curve_fit(f1, xdata, ydata)   # 使用f1进行拟合
popt2, pcov2 = curve_fit(f2, xdata, ydata)    # 使用f2进行拟合

y1 = [f1(i, popt1[0],popt1[1]) for i in xdata]
y2 = [f2(i, popt2[0],popt2[1]) for i in xdata]
plt.plot(xdata,y1,'--',label = 'exp')  
plt.plot(xdata,y2,'--',label = 'para') 
plt.legend()

<matplotlib.legend.Legend at 0x2814ab8ebe0>

数学建模

模型知识点

预测模型-线性回归-基本理论

线性回归¶

一元线性回归模型¶

多元线性回归模型¶

非线性拟合¶

多项式拟合（Polynomial regression）¶

一元其他非线性函数拟合¶

Questions???